[Proposed] 불균형 데이터 (0.3/0.005)

Author

김보람

Published

February 5, 2024

imports

import pandas as pd
import numpy as np
import sklearn
import pickle 
import time 
import datetime
import warnings
warnings.filterwarnings('ignore')
%run function_proposed_gcn.py
with open('fraudTrain.pkl', 'rb') as file:
    fraudTrain = pickle.load(file)    
df_results = try_1(fraudTrain, 0.3, 0.005, 8.028000e+04, 0.3)
df_results = try_1(fraudTrain, 0.3, 0.005, 8.528000e+04, 0.3, prev_results=df_results)
df_results = try_1(fraudTrain, 0.3, 0.005, 7.528000e+04, 0.3, prev_results=df_results)
df_results = try_1(fraudTrain, 0.3, 0.005, 6.528000e+04, 0.3, prev_results=df_results)
df_results = try_1(fraudTrain, 0.3, 0.005, 5.528000e+04, 0.3, prev_results=df_results)
df_results = try_1(fraudTrain, 0.3, 0.005, 4.528000e+04, 0.3, prev_results=df_results)
df_results = try_1(fraudTrain, 0.3, 0.005, 3.528000e+04, 0.3, prev_results=df_results)
df_results = try_1(fraudTrain, 0.3, 0.005, 2.528000e+04, 0.3, prev_results=df_results)
df_results = try_1(fraudTrain, 0.3, 0.005, 1.528000e+04, 0.3, prev_results=df_results)
df_results = try_1(fraudTrain, 0.3, 0.005, 0.528000e+04, 0.3, prev_results=df_results)


ymdhms = datetime.datetime.fromtimestamp(time.time()).strftime('%Y%m%d-%H%M%S') 
df_results.to_csv(f'./results/{ymdhms}-proposed.csv',index=False)
df_results
model time acc pre rec f1 auc graph_based method throw_rate train_size train_cols train_frate test_size test_frate hyper_params theta gamma
0 GCN None 0.968531 0.126761 0.900000 0.222222 0.978308 True Proposed 0.3 14014 amt 0.426431 6006 0.005 None 80280.0 0.3
1 GCN None 0.973859 0.149171 0.900000 0.255924 0.981900 True Proposed 0.3 14014 amt 0.426431 6006 0.005 None 85280.0 0.3
2 GCN None 0.973693 0.148352 0.900000 0.254717 0.967682 True Proposed 0.3 14014 amt 0.426431 6006 0.005 None 75280.0 0.3
3 GCN None 0.971195 0.140704 0.933333 0.244541 0.965412 True Proposed 0.3 14014 amt 0.426431 6006 0.005 None 65280.0 0.3
4 GCN None 0.961871 0.103586 0.866667 0.185053 0.959438 True Proposed 0.3 14014 amt 0.426431 6006 0.005 None 55280.0 0.3
5 GCN None 0.970363 0.130000 0.866667 0.226087 0.958300 True Proposed 0.3 14014 amt 0.426431 6006 0.005 None 45280.0 0.3
6 GCN None 0.967699 0.116822 0.833333 0.204918 0.957937 True Proposed 0.3 14014 amt 0.426431 6006 0.005 None 35280.0 0.3
7 GCN None 0.966034 0.111607 0.833333 0.196850 0.956956 True Proposed 0.3 14014 amt 0.426431 6006 0.005 None 25280.0 0.3
8 GCN None 0.969697 0.127451 0.866667 0.222222 0.955840 True Proposed 0.3 14014 amt 0.426431 6006 0.005 None 15280.0 0.3
9 GCN None 0.973859 0.145251 0.866667 0.248804 0.958517 True Proposed 0.3 14014 amt 0.426431 6006 0.005 None 5280.0 0.3
df_results = try_1(fraudTrain, 0.3, 0.005, 8.028000e+04, 0.2)
df_results = try_1(fraudTrain, 0.3, 0.005, 8.528000e+04, 0.2, prev_results=df_results)
df_results = try_1(fraudTrain, 0.3, 0.005, 7.528000e+04, 0.2, prev_results=df_results)
df_results = try_1(fraudTrain, 0.3, 0.005, 6.528000e+04, 0.2, prev_results=df_results)
df_results = try_1(fraudTrain, 0.3, 0.005, 5.528000e+04, 0.2, prev_results=df_results)
df_results = try_1(fraudTrain, 0.3, 0.005, 4.528000e+04, 0.2, prev_results=df_results)
df_results = try_1(fraudTrain, 0.3, 0.005, 3.528000e+04, 0.2, prev_results=df_results)
df_results = try_1(fraudTrain, 0.3, 0.005, 2.528000e+04, 0.2, prev_results=df_results)
df_results = try_1(fraudTrain, 0.3, 0.005, 1.528000e+04, 0.2, prev_results=df_results)
df_results = try_1(fraudTrain, 0.3, 0.005, 0.528000e+04, 0.2, prev_results=df_results)


ymdhms = datetime.datetime.fromtimestamp(time.time()).strftime('%Y%m%d-%H%M%S') 
df_results.to_csv(f'./results/{ymdhms}-proposed.csv',index=False)
df_results
model time acc pre rec f1 auc graph_based method throw_rate train_size train_cols train_frate test_size test_frate hyper_params theta gamma
0 GCN None 0.969863 0.131707 0.900000 0.229787 0.979507 True Proposed 0.3 14014 amt 0.426431 6006 0.005 None 80280.0 0.2
1 GCN None 0.973693 0.148352 0.900000 0.254717 0.980455 True Proposed 0.3 14014 amt 0.426431 6006 0.005 None 85280.0 0.2
2 GCN None 0.972194 0.141361 0.900000 0.244344 0.980277 True Proposed 0.3 14014 amt 0.426431 6006 0.005 None 75280.0 0.2
3 GCN None 0.974359 0.151685 0.900000 0.259615 0.979351 True Proposed 0.3 14014 amt 0.426431 6006 0.005 None 65280.0 0.2
4 GCN None 0.973693 0.152174 0.933333 0.261682 0.981838 True Proposed 0.3 14014 amt 0.426431 6006 0.005 None 55280.0 0.2
5 GCN None 0.971195 0.140704 0.933333 0.244541 0.965646 True Proposed 0.3 14014 amt 0.426431 6006 0.005 None 45280.0 0.2
6 GCN None 0.973693 0.144444 0.866667 0.247619 0.957675 True Proposed 0.3 14014 amt 0.426431 6006 0.005 None 35280.0 0.2
7 GCN None 0.967366 0.115741 0.833333 0.203252 0.957753 True Proposed 0.3 14014 amt 0.426431 6006 0.005 None 25280.0 0.2
8 GCN None 0.972361 0.138298 0.866667 0.238532 0.957614 True Proposed 0.3 14014 amt 0.426431 6006 0.005 None 15280.0 0.2
9 GCN None 0.971861 0.136126 0.866667 0.235294 0.958478 True Proposed 0.3 14014 amt 0.426431 6006 0.005 None 5280.0 0.2
df_results = try_1(fraudTrain, 0.3, 0.005, 9.028000e+04, 0.3)
df_results = try_1(fraudTrain, 0.3, 0.005, 10.528000e+04, 0.3, prev_results=df_results)
df_results = try_1(fraudTrain, 0.3, 0.005, 11.528000e+04, 0.3, prev_results=df_results)
df_results = try_1(fraudTrain, 0.3, 0.005, 12.528000e+04, 0.3, prev_results=df_results)
df_results = try_1(fraudTrain, 0.3, 0.005, 13.528000e+04, 0.3, prev_results=df_results)
df_results = try_1(fraudTrain, 0.3, 0.005, 14.528000e+04, 0.3, prev_results=df_results)
df_results = try_1(fraudTrain, 0.3, 0.005, 15.528000e+04, 0.3, prev_results=df_results)
df_results = try_1(fraudTrain, 0.3, 0.005, 16.528000e+04, 0.3, prev_results=df_results)
df_results = try_1(fraudTrain, 0.3, 0.005, 17.528000e+04, 0.3, prev_results=df_results)
df_results = try_1(fraudTrain, 0.3, 0.005, 18.528000e+04, 0.3, prev_results=df_results)


ymdhms = datetime.datetime.fromtimestamp(time.time()).strftime('%Y%m%d-%H%M%S') 
df_results.to_csv(f'./results/{ymdhms}-proposed.csv',index=False)
df_results
model time acc pre rec f1 auc graph_based method throw_rate train_size train_cols train_frate test_size test_frate hyper_params theta gamma
0 GCN None 0.972527 0.142857 0.9 0.246575 0.980115 True Proposed 0.3 14014 amt 0.426431 6006 0.005 None 90280.0 0.3
1 GCN None 0.972028 0.140625 0.9 0.243243 0.977622 True Proposed 0.3 14014 amt 0.426431 6006 0.005 None 105280.0 0.3
2 GCN None 0.968698 0.127358 0.9 0.223140 0.978012 True Proposed 0.3 14014 amt 0.426431 6006 0.005 None 115280.0 0.3
3 GCN None 0.968864 0.127962 0.9 0.224066 0.979016 True Proposed 0.3 14014 amt 0.426431 6006 0.005 None 125280.0 0.3
4 GCN None 0.972694 0.143617 0.9 0.247706 0.987316 True Proposed 0.3 14014 amt 0.426431 6006 0.005 None 135280.0 0.3
5 GCN None 0.973693 0.148352 0.9 0.254717 0.987143 True Proposed 0.3 14014 amt 0.426431 6006 0.005 None 145280.0 0.3
6 GCN None 0.973193 0.145946 0.9 0.251163 0.987221 True Proposed 0.3 14014 amt 0.426431 6006 0.005 None 155280.0 0.3
7 GCN None 0.969697 0.131068 0.9 0.228814 0.986998 True Proposed 0.3 14014 amt 0.426431 6006 0.005 None 165280.0 0.3
8 GCN None 0.972194 0.141361 0.9 0.244344 0.986825 True Proposed 0.3 14014 amt 0.426431 6006 0.005 None 175280.0 0.3
9 GCN None 0.966533 0.120000 0.9 0.211765 0.986747 True Proposed 0.3 14014 amt 0.426431 6006 0.005 None 185280.0 0.3
df_results = try_1(fraudTrain, 0.3, 0.005, 8.028000e+04, 0.4)
df_results = try_1(fraudTrain, 0.3, 0.005, 8.528000e+04, 0.4, prev_results=df_results)
df_results = try_1(fraudTrain, 0.3, 0.005, 7.528000e+04, 0.4, prev_results=df_results)
df_results = try_1(fraudTrain, 0.3, 0.005, 6.528000e+04, 0.4, prev_results=df_results)
df_results = try_1(fraudTrain, 0.3, 0.005, 5.528000e+04, 0.4, prev_results=df_results)
df_results = try_1(fraudTrain, 0.3, 0.005, 4.528000e+04, 0.4, prev_results=df_results)
df_results = try_1(fraudTrain, 0.3, 0.005, 3.528000e+04, 0.4, prev_results=df_results)
df_results = try_1(fraudTrain, 0.3, 0.005, 2.528000e+04, 0.4, prev_results=df_results)
df_results = try_1(fraudTrain, 0.3, 0.005, 1.528000e+04, 0.4, prev_results=df_results)
df_results = try_1(fraudTrain, 0.3, 0.005, 0.528000e+04, 0.4, prev_results=df_results)


ymdhms = datetime.datetime.fromtimestamp(time.time()).strftime('%Y%m%d-%H%M%S') 
df_results.to_csv(f'./results/{ymdhms}-proposed.csv',index=False)
df_results
model time acc pre rec f1 auc graph_based method throw_rate train_size train_cols train_frate test_size test_frate hyper_params theta gamma
0 GCN None 0.965534 0.120172 0.933333 0.212928 0.960843 True Proposed 0.3 14014 amt 0.426431 6006 0.005 None 80280.0 0.4
1 GCN None 0.964702 0.117647 0.933333 0.208955 0.965367 True Proposed 0.3 14014 amt 0.426431 6006 0.005 None 85280.0 0.4
2 GCN None 0.962704 0.108871 0.900000 0.194245 0.962405 True Proposed 0.3 14014 amt 0.426431 6006 0.005 None 75280.0 0.4
3 GCN None 0.966533 0.116592 0.866667 0.205534 0.958261 True Proposed 0.3 14014 amt 0.426431 6006 0.005 None 65280.0 0.4
4 GCN None 0.967699 0.116822 0.833333 0.204918 0.957842 True Proposed 0.3 14014 amt 0.426431 6006 0.005 None 55280.0 0.4
5 GCN None 0.963869 0.105485 0.833333 0.187266 0.957759 True Proposed 0.3 14014 amt 0.426431 6006 0.005 None 45280.0 0.4
6 GCN None 0.966034 0.115044 0.866667 0.203125 0.957106 True Proposed 0.3 14014 amt 0.426431 6006 0.005 None 35280.0 0.4
7 GCN None 0.971528 0.134715 0.866667 0.233184 0.955489 True Proposed 0.3 14014 amt 0.426431 6006 0.005 None 25280.0 0.4
8 GCN None 0.972527 0.139037 0.866667 0.239631 0.956621 True Proposed 0.3 14014 amt 0.426431 6006 0.005 None 15280.0 0.4
9 GCN None 0.971029 0.132653 0.866667 0.230088 0.957218 True Proposed 0.3 14014 amt 0.426431 6006 0.005 None 5280.0 0.4
df_results = try_1(fraudTrain, 0.3, 0.005, 8.028000e+04, 0.5)
df_results = try_1(fraudTrain, 0.3, 0.005, 10.528000e+04, 0.5, prev_results=df_results)
df_results = try_1(fraudTrain, 0.3, 0.005, 11.528000e+04, 0.5, prev_results=df_results)
df_results = try_1(fraudTrain, 0.3, 0.005, 12.528000e+04, 0.5, prev_results=df_results)
df_results = try_1(fraudTrain, 0.3, 0.005, 13.528000e+04, 0.5, prev_results=df_results)
df_results = try_1(fraudTrain, 0.3, 0.005, 8.528000e+04, 0.5, prev_results=df_results)
df_results = try_1(fraudTrain, 0.3, 0.005, 7.528000e+04, 0.5, prev_results=df_results)
df_results = try_1(fraudTrain, 0.3, 0.005, 6.528000e+04, 0.5, prev_results=df_results)


ymdhms = datetime.datetime.fromtimestamp(time.time()).strftime('%Y%m%d-%H%M%S') 
df_results.to_csv(f'./results/{ymdhms}-proposed.csv',index=False)
df_results